rm(list = ls())
here::i_am(file.path("code", "08_iv_migration.R"))
library(here)
source(here("code", "config.R"))

est_df <-  read_parquet(here("data", "analysis", "analysis.parquet"))

confederate_states <- c(
  "TEXAS",
  "ARKANSAS",
  "LOUISIANA",
  "MISSISSIPPI",
  "TENNESSEE",
  "ALABAMA",
  "GEORGIA",
  "FLORIDA",
  "SOUTH CAROLINA",
  "NORTH CAROLINA",
  "VIRGINIA"
)

est_df <- est_df |>
  mutate(
    confed_state_cards = toupper(state_cards) %in% confederate_states,
    confed_state_c1930 = toupper(state_c1930) %in% confederate_states,
    moved_region = region_cards != region_c1930,
    moved_state = state_cards != state_c1930,
    moved_county = county_geoid_cards != county_geoid_c1930,
    moved_out_confed = ifelse(confed_state_cards == TRUE, confed_state_c1930 == FALSE, NA),
    moved_to_confed = ifelse(confed_state_cards == FALSE, confed_state_c1930 == TRUE, NA),
    urban_c1930 = ifelse(urban_c1930 == 0, NA, urban_c1930 == 2)
  ) |>
  group_by(county_geoid_c1930) |>
  mutate(num_black_vet_county = sum(vet_combined)) |>
  ungroup() |>
  mutate(
    share_black_pop_xself = (pop_black_county1930 - 1) / (pop_county1930 - 1),
    share_blackvet_blackpop_xself = ifelse(
      vet_combined,
      (num_black_vet_county -  1) / (pop_black_county1930 - 1),
      num_black_vet_county / (pop_black_county1930 - 1)
    )
  )

location_outcomes <- c("moved_region", "moved_state", "moved_county",
                       "is_naacp_city_c1930", "urban_c1930", "moved_out_confed",
                       "moved_to_confed", "share_black_pop_xself", "share_blackvet_blackpop_xself")

iv_res <- feols(
  c(moved_region_z, moved_state_z, moved_county_z, is_naacp_city_c1930_z,
    urban_c1930_z, moved_out_confed_z, moved_to_confed_z, share_black_pop_xself_z,
    share_blackvet_blackpop_xself_z) ~ 1 |
    birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df |>
    mutate(across(all_of(location_outcomes), list(z = ~ scale(.)[,1]))),
  cluster = ~ serial_num
)

p <- iv_res |>
  map_dfr(broom::tidy, conf.int = TRUE, .id = "spec") |>
  mutate(
    outcome = factor(
      spec,
      levels = paste0("lhs: ", paste0(location_outcomes, "_z")),
      c("Moved region", "Moved state", "Moved county", "NAACP city (1930)",
        "Urban area (1930)", "Moved out of Confed. state", "Moved to Confed. state",
        "County share Black (1930)", "County share vet. in Black pop. (1930)")
    )
  ) |>
  ggplot(aes(x = estimate, xmin = conf.low, xmax = conf.high, y = outcome)) +
  geom_vline(xintercept = 0, linetype = "dashed", alpha = .7) +
  my_theme() +
  xlim(-.6, .5) +
  labs(
    x = "TSLS coefficient on veteran",
    y = ""
  ) +
  scale_y_discrete(limits = rev)

p_color <- p +
  geom_point(size = 2, color = "#F8766D") +
  geom_linerange(color = "#F8766D")
ggsave(file.path(fig_dir, "color", "migration.pdf"), plot = p_color, width = 6, height = 3.2)

p_bw <- p +
  geom_point(size = 2, color = "black") +
  geom_linerange(color = "black")
ggsave(file.path(fig_dir, "bw", "migration.pdf"), plot = p_bw, width = 6, height = 3.2)
